{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "94712c4b-5f3f-4d93-a9f4-d33620946c99",
   "metadata": {},
   "source": [
    "# Lesson 3: Vectorstores and embeddings"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "00329cd1-0de7-4b69-b474-d9d315d93f6a",
   "metadata": {},
   "source": [
    "# Vectorstore ingestion"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "634988ed-0a57-49bc-9261-dc19fe66529b",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "import \"dotenv/config\";"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b997a07f-bfb0-407b-8959-fc0c467bd30f",
   "metadata": {
    "height": 97
   },
   "outputs": [],
   "source": [
    "import { OpenAIEmbeddings } from \"@langchain/openai\";\n",
    "\n",
    "const embeddings = new OpenAIEmbeddings();\n",
    "\n",
    "await embeddings.embedQuery(\"This is some sample text\");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "cd470c94-5bec-4efe-8395-416b0a408f8b",
   "metadata": {
    "height": 148
   },
   "outputs": [],
   "source": [
    "import { similarity } from \"ml-distance\";\n",
    "\n",
    "const vector1 = await embeddings.embedQuery(\n",
    "    \"What are vectors useful for in machine learning?\"\n",
    ");\n",
    "const unrelatedVector = await embeddings.embedQuery(\n",
    "    \"A group of parrots is called a pandemonium.\"\n",
    ");"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4476713f-5285-4d2f-9126-d30ee9825090",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "similarity.cosine(vector1, unrelatedVector);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "17ff1478-261f-4f6d-ae0e-5d9d90df086d",
   "metadata": {
    "height": 97
   },
   "outputs": [],
   "source": [
    "const similarVector = await embeddings.embedQuery(\n",
    "    \"Vectors are representations of information.\"\n",
    ");\n",
    "\n",
    "similarity.cosine(vector1, similarVector);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f3366a1-4f80-4d4f-b49b-d7223b143d97",
   "metadata": {
    "height": 318
   },
   "outputs": [],
   "source": [
    "// Peer dependency\n",
    "import * as parse from \"pdf-parse\";\n",
    "import { PDFLoader } from \"langchain/document_loaders/fs/pdf\";\n",
    "import { \n",
    "    RecursiveCharacterTextSplitter\n",
    "} from \"langchain/text_splitter\";\n",
    "\n",
    "const loader = new PDFLoader(\"./data/MachineLearning-Lecture01.pdf\");\n",
    "\n",
    "const rawCS229Docs = await loader.load();\n",
    "\n",
    "const splitter = new RecursiveCharacterTextSplitter({\n",
    "  chunkSize: 128,\n",
    "  chunkOverlap: 0,\n",
    "});\n",
    "\n",
    "const splitDocs = await splitter.splitDocuments(rawCS229Docs);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c2581cfc-aec6-4600-860c-1e100d42a382",
   "metadata": {
    "height": 80
   },
   "outputs": [],
   "source": [
    "import { MemoryVectorStore } from \"langchain/vectorstores/memory\";\n",
    "\n",
    "const vectorstore = new MemoryVectorStore(embeddings);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "10a566f9-4c8e-4199-a394-f52baaceac4d",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "await vectorstore.addDocuments(splitDocs);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6da99c5b-08d1-4f6c-a4dd-aed004b6e023",
   "metadata": {
    "height": 165
   },
   "outputs": [],
   "source": [
    "const retrievedDocs = await vectorstore.similaritySearch(\n",
    "    \"What is deep learning?\", \n",
    "    4\n",
    ");\n",
    "\n",
    "const pageContents = retrievedDocs.map(doc => doc.pageContent);\n",
    "\n",
    "pageContents"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e64cc739-abaf-4e8a-a142-b4cd77abab40",
   "metadata": {},
   "source": [
    "# Retrievers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5e32044-6e84-470a-91a7-262918d5a22b",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "const retriever = vectorstore.asRetriever();"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0d678f1d-c37c-462f-96ba-d1c84ab35482",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": [
    "await retriever.invoke(\"What is deep learning?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "18f6254b-5eaf-4867-999b-97d08f96cee9",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c326a7cc-d9be-432c-be10-7846d2b8e5e9",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8b25e21c-28d0-4329-852e-b96a14c38529",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "520b4a9b-89de-4652-b238-b7dddbd5a2c2",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c325c8b5-9292-41f2-ad10-3ad0ad3df182",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f5fe1d4c-9075-47d4-8451-b9bbc1115668",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9cb3853d-826c-4e1f-a413-9f9f0faf62fe",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3dd60bd2-fcc1-4f96-8763-969e30d52c13",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c8f81c9f-88d7-470a-b597-c4137c9d35a4",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c3b2e1ba-0419-4276-bdb4-3b044b3a7f46",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "24bc88b0-851c-4304-a085-ac0cba9f615f",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f35a5209-e820-496b-9abc-60ade9f22d7c",
   "metadata": {
    "height": 29
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Deno",
   "language": "typescript",
   "name": "deno"
  },
  "language_info": {
   "file_extension": ".ts",
   "mimetype": "text/x.typescript",
   "name": "typescript",
   "nb_converter": "script",
   "pygments_lexer": "typescript",
   "version": "5.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
